library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages -------------------------------------------------------------------- tidyverse 1.3.1 --
√ ggplot2 3.3.5 √ purrr 0.3.4
√ tibble 3.1.6 √ dplyr 1.0.8
√ tidyr 1.2.0 √ stringr 1.4.0
√ readr 2.1.2 √ forcats 0.5.1
-- Conflicts ----------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(readxl)
Warning: package ‘readxl’ was built under R version 4.1.3
candy_2015 <- read_excel("../raw_data/boing-boing-candy-2015.xlsx")
candy_2016 <- read_excel("../raw_data/boing-boing-candy-2016.xlsx")
candy_2017 <- read_excel("../raw_data/boing-boing-candy-2017.xlsx")
New names:
* `` -> ...114
here::here()
[1] "C:/Users/mahri/OneDrive/CodeClan/dirty_data_project/dirty_data_codeclan_project_mahri/dirty_data_task_4_mahri"
From glimpse
head(candy_2015)
glimpse(candy_2016)
Rows: 1,259
Columns: 123
$ Timestamp <dttm> ~
$ `Are you going actually going trick or treating yourself?` <chr> ~
$ `Your gender:` <chr> ~
$ `How old are you?` <chr> ~
$ `Which country do you live in?` <chr> ~
$ `Which state, province, county do you live in?` <chr> ~
$ `[100 Grand Bar]` <chr> ~
$ `[Anonymous brown globs that come in black and orange wrappers]` <chr> ~
$ `[Any full-sized candy bar]` <chr> ~
$ `[Black Jacks]` <chr> ~
$ `[Bonkers (the candy)]` <chr> ~
$ `[Bonkers (the board game)]` <chr> ~
$ `[Bottle Caps]` <chr> ~
$ `[Box'o'Raisins]` <chr> ~
$ `[Broken glow stick]` <chr> ~
$ `[Butterfinger]` <chr> ~
$ `[Cadbury Creme Eggs]` <chr> ~
$ `[Candy Corn]` <chr> ~
$ `[Candy that is clearly just the stuff given out for free at restaurants]` <chr> ~
$ `[Caramellos]` <chr> ~
$ `[Cash, or other forms of legal tender]` <chr> ~
$ `[Chardonnay]` <chr> ~
$ `[Chick-o-Sticks (we don’t know what that is)]` <chr> ~
$ `[Chiclets]` <chr> ~
$ `[Coffee Crisp]` <chr> ~
$ `[Creepy Religious comics/Chick Tracts]` <chr> ~
$ `[Dental paraphenalia]` <chr> ~
$ `[Dots]` <chr> ~
$ `[Dove Bars]` <chr> ~
$ `[Fuzzy Peaches]` <chr> ~
$ `[Generic Brand Acetaminophen]` <chr> ~
$ `[Glow sticks]` <chr> ~
$ `[Goo Goo Clusters]` <chr> ~
$ `[Good N' Plenty]` <chr> ~
$ `[Gum from baseball cards]` <chr> ~
$ `[Gummy Bears straight up]` <chr> ~
$ `[Hard Candy]` <chr> ~
$ `[Healthy Fruit]` <chr> ~
$ `[Heath Bar]` <chr> ~
$ `[Hershey's Dark Chocolate]` <chr> ~
$ `[Hershey’s Milk Chocolate]` <chr> ~
$ `[Hershey's Kisses]` <chr> ~
$ `[Hugs (actual physical hugs)]` <chr> ~
$ `[Jolly Rancher (bad flavor)]` <chr> ~
$ `[Jolly Ranchers (good flavor)]` <chr> ~
$ `[JoyJoy (Mit Iodine!)]` <chr> ~
$ `[Junior Mints]` <chr> ~
$ `[Senior Mints]` <chr> ~
$ `[Kale smoothie]` <chr> ~
$ `[Kinder Happy Hippo]` <chr> ~
$ `[Kit Kat]` <chr> ~
$ `[LaffyTaffy]` <chr> ~
$ `[LemonHeads]` <chr> ~
$ `[Licorice (not black)]` <chr> ~
$ `[Licorice (yes black)]` <chr> ~
$ `[Lindt Truffle]` <chr> ~
$ `[Lollipops]` <chr> ~
$ `[Mars]` <chr> ~
$ `[Mary Janes]` <chr> ~
$ `[Maynards]` <chr> ~
$ `[Mike and Ike]` <chr> ~
$ `[Milk Duds]` <chr> ~
$ `[Milky Way]` <chr> ~
$ `[Regular M&Ms]` <chr> ~
$ `[Peanut M&M’s]` <chr> ~
$ `[Blue M&M's]` <chr> ~
$ `[Red M&M's]` <chr> ~
$ `[Third Party M&M's]` <chr> ~
$ `[Minibags of chips]` <chr> ~
$ `[Mint Kisses]` <chr> ~
$ `[Mint Juleps]` <chr> ~
$ `[Mr. Goodbar]` <chr> ~
$ `[Necco Wafers]` <chr> ~
$ `[Nerds]` <chr> ~
$ `[Nestle Crunch]` <chr> ~
$ `[Now'n'Laters]` <chr> ~
$ `[Peeps]` <chr> ~
$ `[Pencils]` <chr> ~
$ `[Person of Interest Season 3 DVD Box Set (not including Disc 4 with hilarious outtakes)]` <chr> ~
$ `[Pixy Stix]` <chr> ~
$ `[Reese’s Peanut Butter Cups]` <chr> ~
$ `[Reese's Pieces]` <chr> ~
$ `[Reggie Jackson Bar]` <chr> ~
$ `[Rolos]` <chr> ~
$ `[Skittles]` <chr> ~
$ `[Smarties (American)]` <chr> ~
$ `[Smarties (Commonwealth)]` <chr> ~
$ `[Snickers]` <chr> ~
$ `[Sourpatch Kids (i.e. abominations of nature)]` <chr> ~
$ `[Spotted Dick]` <chr> ~
$ `[Starburst]` <chr> ~
$ `[Sweet Tarts]` <chr> ~
$ `[Swedish Fish]` <chr> ~
$ `[Sweetums (a friend to diabetes)]` <chr> ~
$ `[Tic Tacs]` <chr> ~
$ `[Those odd marshmallow circus peanut things]` <chr> ~
$ `[Three Musketeers]` <chr> ~
$ `[Tolberone something or other]` <chr> ~
$ `[Trail Mix]` <chr> ~
$ `[Twix]` <chr> ~
$ `[Vials of pure high fructose corn syrup, for main-lining into your vein]` <chr> ~
$ `[Vicodin]` <chr> ~
$ `[Whatchamacallit Bars]` <chr> ~
$ `[White Bread]` <chr> ~
$ `[Whole Wheat anything]` <chr> ~
$ `[York Peppermint Patties]` <chr> ~
$ `Please list any items not included above that give you JOY.` <chr> ~
$ `Please list any items not included above that give you DESPAIR.` <chr> ~
$ `Please leave any witty, snarky or thoughtful remarks or comments regarding your choices.` <chr> ~
$ `Guess the number of mints in my hand.` <chr> ~
$ `Betty or Veronica?` <chr> ~
$ `"That dress* that went viral a few years back - when I first saw it, it was ________"` <chr> ~
$ `What is your favourite font?` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [JK Rowling]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [JJ Abrams]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [Beyoncé]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [Bieber]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [Kevin Bacon]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [Francis Bacon (1561 - 1626)]` <chr> ~
$ `Which day do you prefer, Friday or Sunday?` <chr> ~
$ `Do you eat apples the correct way, East to West (side to side) or do you eat them like a freak of nature, South to North (bottom to top)?` <chr> ~
$ `When you see the above image of the 4 different websites, which one would you most likely check out (please be honest).` <chr> ~
$ `[York Peppermint Patties] Ignore` <lgl> ~
glimpse(candy_2017)
Rows: 2,460
Columns: 120
$ `Internal ID` <dbl> 90258773, ~
$ `Q1: GOING OUT?` <chr> NA, "No", ~
$ `Q2: GENDER` <chr> NA, "Male"~
$ `Q3: AGE` <chr> NA, "44", ~
$ `Q4: COUNTRY` <chr> NA, "USA",~
$ `Q5: STATE, PROVINCE, COUNTY, ETC` <chr> NA, "NM", ~
$ `Q6 | 100 Grand Bar` <chr> NA, "MEH",~
$ `Q6 | Anonymous brown globs that come in black and orange wrappers\t(a.k.a. Mary Janes)` <chr> NA, "DESPA~
$ `Q6 | Any full-sized candy bar` <chr> NA, "JOY",~
$ `Q6 | Black Jacks` <chr> NA, "MEH",~
$ `Q6 | Bonkers (the candy)` <chr> NA, "DESPA~
$ `Q6 | Bonkers (the board game)` <chr> NA, "DESPA~
$ `Q6 | Bottle Caps` <chr> NA, "DESPA~
$ `Q6 | Box'o'Raisins` <chr> NA, "DESPA~
$ `Q6 | Broken glow stick` <chr> NA, "DESPA~
$ `Q6 | Butterfinger` <chr> NA, "DESPA~
$ `Q6 | Cadbury Creme Eggs` <chr> NA, "MEH",~
$ `Q6 | Candy Corn` <chr> NA, "MEH",~
$ `Q6 | Candy that is clearly just the stuff given out for free at restaurants` <chr> NA, "DESPA~
$ `Q6 | Caramellos` <chr> NA, "MEH",~
$ `Q6 | Cash, or other forms of legal tender` <chr> NA, "JOY",~
$ `Q6 | Chardonnay` <chr> NA, "MEH",~
$ `Q6 | Chick-o-Sticks (we don’t know what that is)` <chr> NA, "DESPA~
$ `Q6 | Chiclets` <chr> NA, "DESPA~
$ `Q6 | Coffee Crisp` <chr> NA, "DESPA~
$ `Q6 | Creepy Religious comics/Chick Tracts` <chr> NA, "DESPA~
$ `Q6 | Dental paraphenalia` <chr> NA, "DESPA~
$ `Q6 | Dots` <chr> NA, "MEH",~
$ `Q6 | Dove Bars` <chr> NA, "JOY",~
$ `Q6 | Fuzzy Peaches` <chr> NA, "DESPA~
$ `Q6 | Generic Brand Acetaminophen` <chr> NA, "DESPA~
$ `Q6 | Glow sticks` <chr> NA, "DESPA~
$ `Q6 | Goo Goo Clusters` <chr> NA, "DESPA~
$ `Q6 | Good N' Plenty` <chr> NA, "MEH",~
$ `Q6 | Gum from baseball cards` <chr> NA, "DESPA~
$ `Q6 | Gummy Bears straight up` <chr> NA, "MEH",~
$ `Q6 | Hard Candy` <chr> NA, "MEH",~
$ `Q6 | Healthy Fruit` <chr> NA, "DESPA~
$ `Q6 | Heath Bar` <chr> NA, "MEH",~
$ `Q6 | Hershey's Dark Chocolate` <chr> NA, "JOY",~
$ `Q6 | Hershey’s Milk Chocolate` <chr> NA, "JOY",~
$ `Q6 | Hershey's Kisses` <chr> NA, "MEH",~
$ `Q6 | Hugs (actual physical hugs)` <chr> NA, "DESPA~
$ `Q6 | Jolly Rancher (bad flavor)` <chr> NA, "DESPA~
$ `Q6 | Jolly Ranchers (good flavor)` <chr> NA, "MEH",~
$ `Q6 | JoyJoy (Mit Iodine!)` <chr> NA, "DESPA~
$ `Q6 | Junior Mints` <chr> NA, "DESPA~
$ `Q6 | Senior Mints` <chr> NA, "DESPA~
$ `Q6 | Kale smoothie` <chr> NA, "DESPA~
$ `Q6 | Kinder Happy Hippo` <chr> NA, "DESPA~
$ `Q6 | Kit Kat` <chr> NA, "JOY",~
$ `Q6 | LaffyTaffy` <chr> NA, "DESPA~
$ `Q6 | LemonHeads` <chr> NA, "MEH",~
$ `Q6 | Licorice (not black)` <chr> NA, "MEH",~
$ `Q6 | Licorice (yes black)` <chr> NA, "JOY",~
$ `Q6 | Lindt Truffle` <chr> NA, "MEH",~
$ `Q6 | Lollipops` <chr> NA, "DESPA~
$ `Q6 | Mars` <chr> NA, "DESPA~
$ `Q6 | Maynards` <chr> NA, "DESPA~
$ `Q6 | Mike and Ike` <chr> NA, "MEH",~
$ `Q6 | Milk Duds` <chr> NA, "MEH",~
$ `Q6 | Milky Way` <chr> NA, "JOY",~
$ `Q6 | Regular M&Ms` <chr> NA, "JOY",~
$ `Q6 | Peanut M&M’s` <chr> NA, "MEH",~
$ `Q6 | Blue M&M's` <chr> NA, "JOY",~
$ `Q6 | Red M&M's` <chr> NA, "JOY",~
$ `Q6 | Green Party M&M's` <chr> NA, "JOY",~
$ `Q6 | Independent M&M's` <chr> NA, "JOY",~
$ `Q6 | Abstained from M&M'ing.` <chr> NA, "DESPA~
$ `Q6 | Minibags of chips` <chr> NA, "DESPA~
$ `Q6 | Mint Kisses` <chr> NA, "MEH",~
$ `Q6 | Mint Juleps` <chr> NA, "DESPA~
$ `Q6 | Mr. Goodbar` <chr> NA, "DESPA~
$ `Q6 | Necco Wafers` <chr> NA, "DESPA~
$ `Q6 | Nerds` <chr> NA, "DESPA~
$ `Q6 | Nestle Crunch` <chr> NA, "JOY",~
$ `Q6 | Now'n'Laters` <chr> NA, "DESPA~
$ `Q6 | Peeps` <chr> NA, "DESPA~
$ `Q6 | Pencils` <chr> NA, "DESPA~
$ `Q6 | Pixy Stix` <chr> NA, "DESPA~
$ `Q6 | Real Housewives of Orange County Season 9 Blue-Ray` <chr> NA, "DESPA~
$ `Q6 | Reese’s Peanut Butter Cups` <chr> NA, "JOY",~
$ `Q6 | Reese's Pieces` <chr> NA, "JOY",~
$ `Q6 | Reggie Jackson Bar` <chr> NA, "DESPA~
$ `Q6 | Rolos` <chr> NA, "JOY",~
$ `Q6 | Sandwich-sized bags filled with BooBerry Crunch` <chr> NA, "DESPA~
$ `Q6 | Skittles` <chr> NA, "DESPA~
$ `Q6 | Smarties (American)` <chr> NA, "DESPA~
$ `Q6 | Smarties (Commonwealth)` <chr> NA, "DESPA~
$ `Q6 | Snickers` <chr> NA, "MEH",~
$ `Q6 | Sourpatch Kids (i.e. abominations of nature)` <chr> NA, "DESPA~
$ `Q6 | Spotted Dick` <chr> NA, "DESPA~
$ `Q6 | Starburst` <chr> NA, "MEH",~
$ `Q6 | Sweet Tarts` <chr> NA, "DESPA~
$ `Q6 | Swedish Fish` <chr> NA, "MEH",~
$ `Q6 | Sweetums (a friend to diabetes)` <chr> NA, "DESPA~
$ `Q6 | Take 5` <chr> NA, "DESPA~
$ `Q6 | Tic Tacs` <chr> NA, "DESPA~
$ `Q6 | Those odd marshmallow circus peanut things` <chr> NA, "DESPA~
$ `Q6 | Three Musketeers` <chr> NA, "JOY",~
$ `Q6 | Tolberone something or other` <chr> NA, "JOY",~
$ `Q6 | Trail Mix` <chr> NA, "DESPA~
$ `Q6 | Twix` <chr> NA, "JOY",~
$ `Q6 | Vials of pure high fructose corn syrup, for main-lining into your vein` <chr> NA, "DESPA~
$ `Q6 | Vicodin` <chr> NA, "DESPA~
$ `Q6 | Whatchamacallit Bars` <chr> NA, "DESPA~
$ `Q6 | White Bread` <chr> NA, "DESPA~
$ `Q6 | Whole Wheat anything` <chr> NA, "DESPA~
$ `Q6 | York Peppermint Patties` <chr> NA, "DESPA~
$ `Q7: JOY OTHER` <chr> NA, "Mound~
$ `Q8: DESPAIR OTHER` <chr> NA, NA, NA~
$ `Q9: OTHER COMMENTS` <chr> NA, "Botto~
$ `Q10: DRESS` <chr> NA, "White~
$ ...114 <chr> NA, NA, NA~
$ `Q11: DAY` <chr> NA, "Sunda~
$ `Q12: MEDIA [Daily Dish]` <dbl> NA, NA, NA~
$ `Q12: MEDIA [Science]` <dbl> NA, 1, NA,~
$ `Q12: MEDIA [ESPN]` <dbl> NA, NA, NA~
$ `Q12: MEDIA [Yahoo]` <dbl> NA, NA, NA~
$ `Click Coordinates (x, y)` <chr> NA, "(84, ~
library(janitor)
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
janitor_candy_2015 <- janitor::clean_names(candy_2015)
janitor_candy_2015
janitor_candy_2016 <- janitor::clean_names(candy_2016)
janitor_candy_2016
janitor_candy_2017 <- janitor::clean_names(candy_2017)
janitor_candy_2017
just looking at who is reporting back about these ones…
janitor_candy_2017 %>%
select(q3_age, q2_gender, q6_independent_m_ms, q6_green_party_m_ms)
janitor_candy_2016 %>%
select(york_peppermint_patties_ignore)
#nobody
2015 clean - remove columns that aren’t candy (see readme) I went from bottom to top to check index as i went Also adding a “year” column and removing “timestamp” for the join (see if can work out extracting year from timestamp and moving over later). Doing this after so that column index isn’t ruined
names(janitor_candy_2015)
[1] "timestamp"
[2] "how_old_are_you"
[3] "are_you_going_actually_going_trick_or_treating_yourself"
[4] "butterfinger"
[5] "x100_grand_bar"
[6] "anonymous_brown_globs_that_come_in_black_and_orange_wrappers"
[7] "any_full_sized_candy_bar"
[8] "black_jacks"
[9] "bonkers"
[10] "bottle_caps"
[11] "box_o_raisins"
[12] "brach_products_not_including_candy_corn"
[13] "bubble_gum"
[14] "cadbury_creme_eggs"
[15] "candy_corn"
[16] "vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein"
[17] "candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants"
[18] "cash_or_other_forms_of_legal_tender"
[19] "chiclets"
[20] "caramellos"
[21] "snickers"
[22] "dark_chocolate_hershey"
[23] "dental_paraphenalia"
[24] "dots"
[25] "fuzzy_peaches"
[26] "generic_brand_acetaminophen"
[27] "glow_sticks"
[28] "broken_glow_stick"
[29] "goo_goo_clusters"
[30] "good_n_plenty"
[31] "gum_from_baseball_cards"
[32] "gummy_bears_straight_up"
[33] "creepy_religious_comics_chick_tracts"
[34] "healthy_fruit"
[35] "heath_bar"
[36] "hershey_s_kissables"
[37] "hershey_s_milk_chocolate"
[38] "hugs_actual_physical_hugs"
[39] "jolly_rancher_bad_flavor"
[40] "jolly_ranchers_good_flavor"
[41] "kale_smoothie"
[42] "kinder_happy_hippo"
[43] "kit_kat"
[44] "hard_candy"
[45] "lapel_pins"
[46] "lemon_heads"
[47] "licorice"
[48] "licorice_not_black"
[49] "lindt_truffle"
[50] "lollipops"
[51] "mars"
[52] "mary_janes"
[53] "maynards"
[54] "milk_duds"
[55] "laffy_taffy"
[56] "minibags_of_chips"
[57] "joy_joy_mit_iodine"
[58] "reggie_jackson_bar"
[59] "pixy_stix"
[60] "nerds"
[61] "nestle_crunch"
[62] "nown_laters"
[63] "pencils"
[64] "milky_way"
[65] "reese_s_peanut_butter_cups"
[66] "tolberone_something_or_other"
[67] "runts"
[68] "junior_mints"
[69] "senior_mints"
[70] "mint_kisses"
[71] "mint_juleps"
[72] "mint_leaves"
[73] "peanut_m_m_s"
[74] "regular_m_ms"
[75] "mint_m_ms"
[76] "ribbon_candy"
[77] "rolos"
[78] "skittles"
[79] "smarties_american"
[80] "smarties_commonwealth"
[81] "chick_o_sticks_we_don_t_know_what_that_is"
[82] "spotted_dick"
[83] "starburst"
[84] "swedish_fish"
[85] "sweetums"
[86] "those_odd_marshmallow_circus_peanut_things"
[87] "three_musketeers"
[88] "peterson_brand_sidewalk_chalk"
[89] "peanut_butter_bars"
[90] "peanut_butter_jars"
[91] "trail_mix"
[92] "twix"
[93] "vicodin"
[94] "white_bread"
[95] "whole_wheat_anything"
[96] "york_peppermint_patties"
[97] "please_leave_any_remarks_or_comments_regarding_your_choices"
[98] "please_list_any_items_not_included_above_that_give_you_joy"
[99] "please_list_any_items_not_included_above_that_give_you_despair"
[100] "guess_the_number_of_mints_in_my_hand"
[101] "betty_or_veronica"
[102] "check_all_that_apply_i_cried_tears_of_sadness_at_the_end_of"
[103] "that_dress_that_went_viral_early_this_year_when_i_first_saw_it_it_was"
[104] "fill_in_the_blank_taylor_swift_is_a_force_for"
[105] "what_is_your_favourite_font"
[106] "if_you_squint_really_hard_the_words_intelligent_design_would_look_like"
[107] "fill_in_the_blank_imitation_is_a_form_of"
[108] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_jk_rowling"
[109] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_jj_abrams"
[110] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_beyonce"
[111] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_bieber"
[112] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_kevin_bacon"
[113] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_francis_bacon_1561_1626"
[114] "sea_salt_flavored_stuff_probably_chocolate_since_this_is_the_it_flavor_of_the_year"
[115] "necco_wafers"
[116] "which_day_do_you_prefer_friday_or_sunday"
[117] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_bruce_lee"
[118] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_jk_rowling"
[119] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_malala_yousafzai"
[120] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_thom_yorke"
[121] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_jj_abrams"
[122] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_hillary_clinton"
[123] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_donald_trump"
[124] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_beyonce_knowles"
col_removed_candy_2015 <- janitor_candy_2015 %>%
select(-c(116:124), -c(97:113), -c(93:95), -c(90, 91), -c(peterson_brand_sidewalk_chalk, spotted_dick, mint_leaves, joy_joy_mit_iodine, minibags_of_chips, lapel_pins, kale_smoothie, hugs_actual_physical_hugs, heath_bar, healthy_fruit, creepy_religious_comics_chick_tracts, broken_glow_stick, glow_sticks, generic_brand_acetaminophen, dental_paraphenalia, cash_or_other_forms_of_legal_tender, vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein, box_o_raisins, timestamp)) %>%
add_column(year = "2015", .before = 1)
col_removed_candy_2015
#view(col_removed_candy_2015)
2015 clean - renaming columns so they match other years considered merging anonymous brown globs with mary janes (in 2017 it is “anon brown… aka Mary Janes”) but looking at responses, they don’t match up so I’ll leave as anonymous_black_and_orange_wrappers…
col_removed_candy_2015 %>%
select(anonymous_brown_globs_that_come_in_black_and_orange_wrappers, mary_janes)
candy_2015_renamed <- col_removed_candy_2015 %>%
rename(age = how_old_are_you, trick_or_treating = are_you_going_actually_going_trick_or_treating_yourself, anonymous_black_and_orange_wrapper = anonymous_brown_globs_that_come_in_black_and_orange_wrappers, brach_not_including_candy_corn = brach_products_not_including_candy_corn, restaurant_candy = candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants, hersheys_dark_chocolate = dark_chocolate_hershey, gummy_bears = gummy_bears_straight_up, hersheys_kissables = hershey_s_kissables, hersheys_milk_chocolate = hershey_s_milk_chocolate, licorice_black = licorice, reeses_peanut_butter_cups = reese_s_peanut_butter_cups, toblerone = tolberone_something_or_other, peanut_m_ms = peanut_m_m_s, chick_o_stick = chick_o_sticks_we_don_t_know_what_that_is, circus_peanuts = those_odd_marshmallow_circus_peanut_things, sea_salt_chocolate = sea_salt_flavored_stuff_probably_chocolate_since_this_is_the_it_flavor_of_the_year)
2016 clean - remove unnecessary columns - bottom to top Also adding a year column for the binding of rows
col_removed_candy_2016 <- janitor_candy_2016 %>%
select(-c(104, 105, 107:123), -c(vicodin, vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein, trail_mix, spotted_dick, person_of_interest_season_3_dvd_box_set_not_including_disc_4_with_hilarious_outtakes, minibags_of_chips, kale_smoothie, joy_joy_mit_iodine, hugs_actual_physical_hugs, heath_bar, healthy_fruit, glow_sticks, generic_brand_acetaminophen, dental_paraphenalia, creepy_religious_comics_chick_tracts, chardonnay, cash_or_other_forms_of_legal_tender, broken_glow_stick, boxo_raisins, bonkers_the_board_game, timestamp)) %>%
add_column(year = "2016", .before = 1)
2016 - rename so they match other sheets considered merging anonymous brown globs with mary janes (in 2017 it is “anon brown… aka Mary Janes”) but looking at responses, they don’t match up so I’ll leave as anonymous_black_and_orange_wrappers…
col_removed_candy_2016 %>%
select(anonymous_brown_globs_that_come_in_black_and_orange_wrappers, mary_janes)
candy_2016_renamed <- col_removed_candy_2016 %>%
rename(trick_or_treating = are_you_going_actually_going_trick_or_treating_yourself, gender = your_gender, age = how_old_are_you, country = which_country_do_you_live_in, state_or_prov = which_state_province_county_do_you_live_in, anonymous_black_and_orange_wrapper = anonymous_brown_globs_that_come_in_black_and_orange_wrappers, bonkers = bonkers_the_candy, restaurant_candy = candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants, chick_o_stick = chick_o_sticks_we_don_t_know_what_that_is, gummy_bears = gummy_bears_straight_up, hersheys_milk_chocolate = hershey_s_milk_chocolate, licorice_black = licorice_yes_black, peanut_m_ms = peanut_m_m_s, party_bag_m_ms = third_party_m_ms, reeses_peanut_butter_cups = reese_s_peanut_butter_cups, sourpatch_kids = sourpatch_kids_i_e_abominations_of_nature, sweetarts = sweet_tarts, sweetums = sweetums_a_friend_to_diabetes, circus_peanuts = those_odd_marshmallow_circus_peanut_things, toblerone = tolberone_something_or_other)
candy_2016_renamed
NA
2017 clean - remove those not candy and internal id as others don’t have Also adding a year column for the join
col_removed_candy_2017 <- janitor_candy_2017 %>%
select(-c(102, 104, 105, 107, 108, 110:120), -c(q6_spotted_dick, q6_sandwich_sized_bags_filled_with_boo_berry_crunch, q6_real_housewives_of_orange_county_season_9_blue_ray, q6_minibags_of_chips, q6_abstained_from_m_ming, q6_kale_smoothie, q6_joy_joy_mit_iodine, q6_hugs_actual_physical_hugs, q6_heath_bar, q6_healthy_fruit, q6_glow_sticks, q6_generic_brand_acetaminophen, q6_dental_paraphenalia, q6_creepy_religious_comics_chick_tracts, q6_chardonnay, q6_cash_or_other_forms_of_legal_tender, q6_broken_glow_stick, q6_boxo_raisins, q6_bonkers_the_board_game, internal_id)) %>%
add_column(year = "2017", .before = 1)
col_removed_candy_2017
2017 - rename - get rid of q1/2/3/4/5/6 at the start of col names and rename to match 2015 and 16
candy_2017_q_removed <- col_removed_candy_2017 %>%
rename_all(~ sub("^[a-z0-9]{2}_", "",
make.names(names(col_removed_candy_2017))))
candy_2017_renamed <- candy_2017_q_removed %>%
rename(trick_or_treating = going_out, state_or_prov = state_province_county_etc, x100_grand_bar = `100_grand_bar`, mary_janes = anonymous_brown_globs_that_come_in_black_and_orange_wrappers_a_k_a_mary_janes, bonkers = bonkers_the_candy, restaurant_candy = candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants, chick_o_stick = chick_o_sticks_we_don_t_know_what_that_is, gummy_bears = gummy_bears_straight_up, hersheys_milk_chocolate = hershey_s_milk_chocolate, licorice_black = licorice_yes_black, peanut_m_ms = peanut_m_m_s, green_m_ms = green_party_m_ms, lone_m_ms = independent_m_ms, reeses_peanut_butter_cups = reese_s_peanut_butter_cups, sourpatch_kids = sourpatch_kids_i_e_abominations_of_nature, sweetarts = sweet_tarts, sweetums = sweetums_a_friend_to_diabetes, circus_peanuts = those_odd_marshmallow_circus_peanut_things, toblerone = tolberone_something_or_other)
candy_2017_renamed
view(candy_2015_renamed)
view(candy_2016_renamed)
view(candy_2017_renamed)
Getting an idea of people’s responses:
distinct(candy_2015_renamed, age) #(chr...) 146 responses, some silly and some strange
distinct(candy_2015_renamed, trick_or_treating) # yes or no (chr)
distinct(candy_2015_renamed, starburst)
distinct(candy_2016_renamed, age) #chr 98 incl silly/strange
distinct(candy_2016_renamed, trick_or_treating) #Yes No (chr)
distinct(candy_2016_renamed, gender) #Male, Female, Other, I'd rather not say, NA
distinct(candy_2016_renamed, country) #93 some silly, some e.g. USA, US, us, u.s.a. etc
distinct(candy_2016_renamed, starburst)
distinct(candy_2017_renamed, age) #chr 107 incl silly/strange
distinct(candy_2017_renamed, trick_or_treating) #Yes No and NA(chr)
distinct(candy_2017_renamed, gender) #Male, Female, Other, I'd rather not say, NA
distinct(candy_2017_renamed, country) #118 some silly, some e.g. USA, US, us, u.s.a. etc
distinct(candy_2017_renamed, starburst)
Join all three
bound_candy <- bind_rows(candy_2015_renamed, candy_2016_renamed, candy_2017_renamed)
bound_candy <- bound_candy %>%
relocate(country, .before = 4) %>%
relocate(state_or_prov, .before = 5) %>%
relocate(gender, .before = 6)
view(bound_candy)
—————- COUNTRY CLEANING ————————–
Note that 2015 (5630 rows of 9349) has no country data…
5715 rows in bound_candy has NA
library(stringr)
bound_country_clean <- bound_candy %>%
mutate(country = str_replace_all(country, pattern = "[0-9][0-9][.][0-9]", "States"),
country = str_replace_all(country, pattern = "[3|4|5][0-9]", "States"),
country = str_replace_all(country, pattern = "^[uU]+[sS]+[aA]+", "States"),
country = str_replace_all(country, pattern = "^[uU]\\.[sS]\\.[aA]\\.", "States"),
country = str_replace_all(country, pattern = "^[uU]\\s[sS]\\s[aA]", "States"),
country = str_replace_all(country, pattern = "^[uU]\\.[sS]\\.", "States"),
country = str_replace_all(country, pattern = "^[uU][sS]", "States"),
country = str_replace_all(country, pattern = "^[uU]\\s[sS]", "States"),
country = str_replace_all(country, pattern = "states", "States"),
country = str_replace_all(country, pattern = "Statess", "States"),
country = str_replace_all(country, pattern = "USSA", "States"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[sS][tT][aA][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[sS][tT][aA][tT][eE]", "States"),
# the above doesn't work for all... i presume there are spaces somewhere
country = str_replace_all(country, pattern = "[uU][nN][iI][tT|eE][sS|dD]\\s[sS][tT][aA][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "[uU][nN][iI][tT][eE][dD]\\s[sS][tT][eE|sS][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "[uU][nN][iI][tT][eE][dD]\\s[sS][aA][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "^[mM][uU|eE][rR][iI][cC][aA]", "States"),
country = str_replace_all(country, pattern = "^[aA][mM][eE][rR][iI][cC][aA]", "States"),
country = str_replace_all(country, pattern = "^\\'[mM][uU|eE][rR][iI][cC][aA]", "States"),
country = str_replace_all(country, pattern = "[sS][tT][aA][tT][eE][sS][!]", "States"),
country = str_replace_all(country, pattern = "cascadia", "States"),
country = str_replace_all(country, pattern = "The republic of Cascadia", "States"),
#above cascadia doesn't work
country = str_replace_all(country, pattern = "the best ", ""),
country = str_replace_all(country, pattern = " of [aA]merica", ""),
country = str_replace_all(country, pattern = " USA USA USA", ""),
# how do i make it USA 1 to 3 times?
country = str_replace_all(country, pattern = " USA USA", ""),
country = str_replace_all(country, pattern = " USA", ""),
# the above made "not the USA or canada" into "not the or canada"
country = str_replace_all(country, pattern = "[a-zA-Z]+ [-]+ [uU][sS][aA]", "States"),
# the above only changed "the best one - usa" to "the best States"
country = str_replace_all(country, pattern = "Sub-Canadian North America... 'Merica", "States"),
country = str_replace_all(country, pattern = "unhinged ", ""),
country = str_replace_all(country, pattern = " of A", ""),
country = str_replace_all(country, pattern = "unite States", "States"),
country = str_replace_all(country, pattern = "The United [sS]", "S"),
country = str_replace_all(country, pattern = "North Carolina", "States"),
country = str_replace_all(country, pattern = "Pittsburgh", "States"),
country = str_replace_all(country, pattern = "New York", "States"),
country = str_replace_all(country, pattern = "Trumpistan", "States"),
country = str_replace_all(country, pattern = "UD", "States"),
country = str_replace_all(country, pattern = "New Jersey", "States"),
country = str_replace_all(country, pattern = "murrika", "States"),
country = str_replace_all(country, pattern = "Alaska", "States"),
country = str_replace_all(country, pattern = "N. America", "States"),
country = str_replace_all(country, pattern = "I don't know anymore", "States"),
country = str_replace_all(country, pattern = "States[sS|dD|aA]", "States"),
country = str_replace_all(country, pattern = "!.*", ""),
# this got rid of lots but "USA! USA! USA!" is now "StatesUSAUSA"
country = str_replace_all(country, pattern = "StatesUSAUSA", "States"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][a-zA-Z]+\\s[sS][tT][aA][tT|eE][eE|sS]", "States"),
country = str_replace_all(country, pattern = "^[uU][kK]", "United Kingdom"),
country = str_replace_all(country, pattern = "^[uU][.]*[kK][.]*", "United Kingdom"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[kK][iI][nN][dD][oO][mM]", "United Kingdom"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[kK][iI][nN][gG][dD][oO][mM]", "United Kingdom"),
country = str_replace_all(country, pattern = "^[eE][nN][gG|dD][lL][aA][nN][dD]", "United Kingdom"),
country = str_replace_all(country, pattern = "Scotland", "United Kingdom"),
country = str_replace_all(country, pattern = "^[cC][a-zA-Z]{5}", "Canada"),
country = str_replace_all(country, pattern = "^[cC][a-zA-Z]{5}rnia", "Canada"),
country = str_replace_all(country, pattern = "^[cC][a-zA-Z]{5}ia", "Canada"),
country = str_replace_all(country, pattern = "^Canada`", "Canada"),
country = str_replace_all(country, pattern = "soviet canuckistan`", "Canada"),
country = str_replace_all(country, pattern = "^Canadaa", "Canada")
)
bound_country_clean %>%
distinct(country)
# view(bound_country_clean)
# still and issue:
# States? Hard to tell anymore..
# one "United States" not changing (probably spaces issue??)
# States (I think but it's an election year so who can really tell)
# I pretend to be from Canada, but I am really from the United States.
# Ahem....Amerca
# there isn't one for old men
# one of ones (previously "one of the good ones"?)
# The Yoo Essaayyyyyy
# this one
# neverland
# somewhere
# god's country
# EUA
# See above
# Not the or Canada (previously not the US or canada)
# Denial
# Earth
# insanity lately
# A
# Can
# Canae
# Atlantis
# Narnia
# 1
# subscribe to dm4uz3 on youtube
# Fear and Loathing
2016 testing
country_clean_2016 <- candy_2016_renamed %>%
mutate(country = str_replace_all(country, pattern = "^[uU]+[sS]+[aA]+", "States"),
country = str_replace_all(country, pattern = "^[uU]\\.[sS]\\.[aA]\\.", "States"),
country = str_replace_all(country, pattern = "^[uU]\\.[sS]\\.", "States"),
country = str_replace_all(country, pattern = "^[uU][sS]", "States"),
country = str_replace_all(country, pattern = "[U][S][A][!]\\s[U][S][A][!]\\s[U][S][A][!]", "States"),
# didn't work... try again below
country = str_replace_all(country, pattern = "[sS]tates", "States"),
country = str_replace_all(country, pattern = "[uU][sS][sS][aA]", "States"),
# country = str_replace_all(country, pattern = "[ *][uU][nN][iI][tT][eD]\\s[sS][tT][aA][tT][eE][sS]\\s[oO][fF]\\s[aA][mM][eE][rR][iI][cC][aA][ *]", "America"),
# united states of america still not working...
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[sS][tT][aA][tT][eE][sS]", "States"),
# country = str_replace_all(country, pattern = "^\\s*[uU][nN][iI][tT][eE][dD]+\\s[sS][tT][aA][tT][eE][sS]\\s*", "States"),
#country = str_replace_all(country, pattern = "^[ ]*[uU][nN][iI][tT][eE][dD]+\\s[sS][tT][aA][tT][eE][sS][ ]*", "States"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[sS][tT][aA][tT][eE]", "States"),
# the above doesn't work for all...
country = str_replace_all(country, pattern = "[uU][nN][iI][tT][sS]\\s[sS][tT][aA][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "[uU][nN][iI][tT][eE][dD]\\s[sS][tT][eE][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "[uU][nN][iI][tT][eE][dD]\\s[sS][aA][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "^[mM][uU|eE][rR][iI][cC][aA]", "States"),
country = str_replace_all(country, pattern = "^[aA][mM][eE][rR][iI][cC][aA]", "States"),
country = str_replace_all(country, pattern = "[sS][tT][aA][tT][eE][sS][!]", "States"),
country = str_replace_all(country, pattern = "!.*", ""),
#the ! arguement isn't working! still get e.g. USA! USA! USA! as
#States! USA! USA!"
# country = str_replace_all(country, pattern = "^.*[!].*", "States"),
#country = str_replace_all(country, pattern = "^[Uu]+[Ss]+[Aa]+[!{1, .}]", "America")
# country = str_replace_all(country, pattern = "^[uU]+[sS]+[aA]+[:punct:]+", "America")
country = str_replace_all(country, pattern = "[a-zA-Z]+ [-]+ [uU][sS][aA]", "States"),
# the above only changed "the best one - usa" to "the best States"
country = str_replace_all(country, pattern = "Cascadia", "States"),
# one of these has changed to The republic of States
country = str_replace_all(country, pattern = "Trumpistan", "States"),
country = str_replace_all(country, pattern = "Sub-Canadian North America... 'Merica", "States"),
country = str_replace_all(country, pattern = "[0-9][0-9][.][0-9]", "States"),
country = str_replace_all(country, pattern = "the best ", ""),
country = str_replace_all(country, pattern = " of [aA]merica", ""),
country = str_replace_all(country, pattern = " USA USA USA", ""),
# how do i make it USA 1 to 3 times?
country = str_replace_all(country, pattern = " USA USA", ""),
country = str_replace_all(country, pattern = " USA", ""),
# the above made "not the USA or canada" into "not the or canada"
country = str_replace_all(country, pattern = "^[uU][kK]", "United Kingdom"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[kK][iI][nN][dD][oO][mM]", "United Kingdom"),
country = str_replace_all(country, pattern = "^[eE][nN][gG][lL][aA][nN][dD]", "United Kingdom"),
country = str_replace_all(country, pattern = "^[cC]anada", "Canada")
)
# view(country_clean_2016)
country_clean_2016 %>%
distinct(country)
NA
NA
2017 testing
# there must be a way to say U and anything after, space, S and anything after
country_clean_2017 <- candy_2017_renamed %>%
mutate(country = str_replace_all(country, pattern = "^[uU]+[sS]+[aA]+", "States"),
country = str_replace_all(country, pattern = "^[uU]\\.[sS]\\.[aA]\\.", "States"),
country = str_replace_all(country, pattern = "^[uU]\\.[sS]\\.", "States"),
country = str_replace_all(country, pattern = "^[uU][sS]", "States"),
country = str_replace_all(country, pattern = "^[uU]\\s[sS]", "States"),
country = str_replace_all(country, pattern = "[U][S][A][!]\\s[U][S][A][!]\\s[U][S][A][!]", "States"),
# didn't work... try again below
country = str_replace_all(country, pattern = "[sS]tates", "States"),
country = str_replace_all(country, pattern = "[uU][sS][sS][aA]", "States"),
# country = str_replace_all(country, pattern = "[ *][uU][nN][iI][tT][eD]\\s[sS][tT][aA][tT][eE][sS]\\s[oO][fF]\\s[aA][mM][eE][rR][iI][cC][aA][ *]", "America"),
# united states of america still not working...
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[sS][tT][a-zA-Z][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][eE][dD]+\\s[sS][tT][a-zA-Z][tT][eE][sS]", "States"),
# country = str_replace_all(country, pattern = "^\\s*[uU][nN][iI][tT][eE][dD]+\\s[sS][tT][aA][tT][eE][sS]\\s*", "States"),
#country = str_replace_all(country, pattern = "^[ ]*[uU][nN][iI][tT][eE][dD]+\\s[sS][tT][aA][tT][eE][sS][ ]*", "States"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][a-zA-Z]+\\s[sS][tT][aA][tT|eE][eE|sS]", "States"),
# the above doesn't work for all...
country = str_replace_all(country, pattern = "[uU][nN][iI][tT][sS]\\s[sS][tT][aA][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "[uU][nN][iI][tT][eE][dD]\\s[sS][tT][eE][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "[uU][nN][iI][tT][eE][dD]\\s[sS][aA][tT][eE][sS]", "States"),
country = str_replace_all(country, pattern = "^[mM][uU|eE][rR][iI][cC][aA]", "States"),
country = str_replace_all(country, pattern = "^\\'[mM][uU|eE][rR][iI][cC][aA]", "States"),
country = str_replace_all(country, pattern = "^[aA][mM][eE][rR][iI][cC][aA]", "States"),
country = str_replace_all(country, pattern = "[sS][tT][aA][tT][eE][sS][!|a-zA-Z]", "States"),
country = str_replace_all(country, pattern = "!.*", ""),
#the ! arguement isn't working! still get e.g. USA! USA! USA! as
#States! USA! USA!"
# country = str_replace_all(country, pattern = "^.*[!].*", "States"),
#country = str_replace_all(country, pattern = "^[Uu]+[Ss]+[Aa]+[!{1, .}]", "America")
# country = str_replace_all(country, pattern = "^[uU]+[sS]+[aA]+[:punct:]+", "America")
country = str_replace_all(country, pattern = "[a-zA-Z]+ [-]+ [uU][sS][aA]", "States"),
# the above only changed "the best one - usa" to "the best States"
country = str_replace_all(country, pattern = "the best ", ""),
country = str_replace_all(country, pattern = " of [aA]merica", ""),
country = str_replace_all(country, pattern = " USA USA USA", ""),
# how do i make it USA 1 to 3 times?
country = str_replace_all(country, pattern = " USA USA", ""),
country = str_replace_all(country, pattern = " USA", ""),
# the above made "not the USA or canada" into "not the or canada"
country = str_replace_all(country, pattern = "StatesSAUSA", "States"),
country = str_replace_all(country, pattern = "unhinged ", ""),
country = str_replace_all(country, pattern = " of A", ""),
country = str_replace_all(country, pattern = "unite States", "States"),
country = str_replace_all(country, pattern = "The United [sS]", "S"),
country = str_replace_all(country, pattern = "North Carolina", "States"),
country = str_replace_all(country, pattern = "Pittsburgh", "States"),
country = str_replace_all(country, pattern = "New York", "States"),
country = str_replace_all(country, pattern = "Trumpistan", "States"),
country = str_replace_all(country, pattern = "UD", "States"),
country = str_replace_all(country, pattern = "New Jersey", "States"),
country = str_replace_all(country, pattern = "murrika", "States"),
country = str_replace_all(country, pattern = "Alaska", "States"),
country = str_replace_all(country, pattern = "N. America", "States"),
country = str_replace_all(country, pattern = "I don't know anymore", "States"),
country = str_replace_all(country, pattern = "[3|4][0-9]", "States"),
country = str_replace_all(country, pattern = "^[uU][.]*[kK][.]*", "United Kingdom"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[kK][iI][nN][dD][oO][mM]", "United Kingdom"),
country = str_replace_all(country, pattern = "^[uU][nN][iI][tT][eE][dD]+\\s[kK][iI][nN][gG][dD][oO][mM]", "United Kingdom"),
country = str_replace_all(country, pattern = "^[eE][nN][gG|dD][lL][aA][nN][dD]", "United Kingdom"),
country = str_replace_all(country, pattern = "Scotland", "United Kingdom"),
country = str_replace_all(country, pattern = "^[cC][a-zA-Z]{5}", "Canada"),
country = str_replace_all(country, pattern = "^[cC][a-zA-Z]{5}rnia", "Canada"),
country = str_replace_all(country, pattern = "^[cC][a-zA-Z]{5}ia", "Canada"),
country = str_replace_all(country, pattern = "^[cC][a-zA-Z]{5}`", "Canada")
# country = str_replace_all(country, pattern = "^Can", "Canada"),
# country = str_replace_all(country, pattern = "^Canae", "Canada")
)
# view(country_2017)
country_clean_2017 %>%
distinct(country)
#country_2017 %>%
# select(country, state_or_prov) %>%
# filter(is.na(country))
# 4 rows have an American state under "state_or_prov"
AGE QUESTION
bound_candy <- bind_rows(candy_2015_renamed, country_clean_2016, country_clean_2017)
bound_candy <- bound_candy %>%
relocate(country, .before = 4) %>%
relocate(state_or_prov, .before = 5) %>%
relocate(gender, .before = 6)
view(bound_candy)